ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo_20220926.csv"))
ps$demo_gender <- factor(ps$demo_gender,
levels = c(1,2),
labels = c("Male", "Female"))
ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2
ps$user_cat <- factor(ps$user_cat,
levels = c(0,1,2),
labels = c("non-user",
"occasional",
"daily"))
ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1
ps$tp <- factor(ps$tp,
levels = c(0,1),
labels = c("pre2", "post"))
# subject level data
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "thc")])
There are more subjects in total than by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.
Add scalar features for each participant-time point
scalar.feat <- read.csv(file.path(data_folder, ps_folder, "scalars_trim.csv"),
header = T, stringsAsFactors = F)
scalar.feat$subject_id <- substr(scalar.feat$timeid, 1, 7)
scalar.feat$tp <- substr(scalar.feat$timeid, 8, 11)
scalar.featR <- scalar.feat[scalar.feat$eye == "Right", ]
pt.df <- merge(pt.df, scalar.featR[, c("subject_id", "tp", "min_constriction",
"duration", "avg_end_percent", "slope",
"AUC", "eye")],
by = c("subject_id", "tp"))
Reshape participant demog data to preserve pre and post THC levels and scalar features
pt.df.w <- reshape(pt.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "eye"),
direction = "wide")
pt.df.w$thc.post[pt.df.w$user_cat == "non-user" & is.na(pt.df.w$thc.post)] <- 0
pt.df.w$thc_chg <- pt.df.w$thc.post - pt.df.w$thc.pre2
pt.df.w$BMI <- pt.df.w$demo_weight*703/(pt.df.w$demo_height)^2
pt.df.w$min_constriction_chg <- pt.df.w$min_constriction.post - pt.df.w$min_constriction.pre2
pt.df.w$AUC_chg <- pt.df.w$AUC.post - pt.df.w$AUC.pre2
pt.df.w$duration_chg <- pt.df.w$duration.post - pt.df.w$duration.pre2
pt.df.w$avg_end_percent_chg <- pt.df.w$avg_end_percent.post - pt.df.w$avg_end_percent.pre2
pt.df.w$slope_chg <- pt.df.w$slope.post - pt.df.w$slope.pre2
## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"),
sheet = "Raw")
testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date,
origin = "1899-12-30")
testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
" ", testTimes$pre_safetyscan_time_hr,
":", testTimes$pre_safetyscan_time_min,
":", "00"),
format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
" ", testTimes$consump_start_time_hr,
":", testTimes$consump_start_time_min,
":", "00"),
format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
" ", testTimes$consump_end_time_hr,
":", testTimes$consump_end_time_min,
":", "00"),
format = "%Y-%m-%d %H:%M:%S")
testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert,
" ", testTimes$post_safetyscan_time_hr,
":", testTimes$post_safetyscan_time_min,
":", "00"),
format = "%Y-%m-%d %H:%M:%S")
testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$consump_end_time_convert,
units = "mins"))
testTimes$Post_PreTime <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$pre_safetyscan_time_convert,
units = "mins"))
testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]
# ## For subjects without a post Consumption to test time, we're using the time between pre and post test -- DO NOT "impute" this way -- JW 11/11/2022
# testTimes$postConsumpTimeToTest[is.na(testTimes$postConsumpTimeToTest)] <- testTimes$Post_PreTime[is.na(testTimes$postConsumpTimeToTest)]
pt.df <- merge(pt.df.w, testTimes[, c("subject_id", "postConsumpTimeToTest", "Post_PreTime")],
by = "subject_id")
ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest", "Post_PreTime")],
by = "subject_id")
non_user.id <- pt.df$subject_id[pt.df$user_cat == "non-user"]
vs <- read.xlsx(file.path(data_folder, ps_folder,
"All CDPHE VAS Scores_30Nov2022.xlsx"),
sheet = "VAS")
### Data Dictionary
# "subject_id"
# "vas0_high_score" -- vas score for how high you feel at pre
# "vas0_score_dc" -- how confident you feel about driving at pre
# "vas1_high_score" -- vas score for how high you feel at post
# "vas1_score_dc" -- how confident you feel about driving at post
pt.df <- merge (pt.df, vs,
by = "subject_id",
all.x = T)
pt.df$vas_high_score_chg <- pt.df$vas1_high_score - pt.df$vas0_high_score
pt.df$vas_score_dc_chg <- pt.df$vas1_score_dc - pt.df$vas0_score_dc
Remove Outliers
## Remove known outliers
ps$outliers <- 0
# ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1 # Ben revised
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1
ps <- ps[ps$outliers == 0, ]
Plot of PRE/POST for Right Eye
right.df <- ps[ps$eye == "Right", ]
ggplot(right.df, aes(x=frame/30, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title = "Percent Change by Pre/Post and THC use category",
x = "seconds")+
theme_bw()
Make sure datasets have the same participants and are truncated at frame 400
ps <- ps[ps$frame >= 0 & ps$frame <= 400,
c("subject_id", "tp", "eye", "frame", "percent_change")]
ps <- ps[order(ps$subject_id, ps$tp, ps$eye, ps$frame), ]
ps$frame_char <- str_pad(ps$frame, width = 3, side = c("left"), pad = "0")
ps$frame <- NULL
ps.w <- reshape(ps[, c("subject_id","tp", "eye", "frame_char", "percent_change")],
timevar = "frame_char",
idvar = c("subject_id", "tp", "eye"),
direction = "wide")
var.names <- c(names(ps.w)[1:3], sort(names(ps.w)[4:404]))
ps.w <- ps.w[, var.names]
id.names <- paste(ps.w$subject_id, ps.w$tp, ps.w$eye, sep = "_")
rownames(ps.w) <- id.names
missData <- apply(ps.w[, 4:404], MARGIN = 1, FUN = function(x) sum(is.na(x)))
missInterpolate <- function(x){
z <- zoo(x, order.by = seq(0:400))
y <- na.approx(z, maxgap = 3, na.rm = FALSE)
return(as.numeric(y))
}
test <- apply(ps.w[, 4:404],
MARGIN = 1,
FUN = missInterpolate)
ps.w <- data.frame(t(test))
names(ps.w) <- var.names[4:404]
ps.w$subject_id <- substr(rownames(ps.w), 1, 7)
ps.w$tp <- ifelse(grepl("pre2", rownames(ps.w)) == T, "pre2", "post")
ps.w$eye <- ifelse(grepl("Left", rownames(ps.w)) == T, "Left", "Right")
ps.w <- ps.w[, var.names]
pctChg.names <- var.names[4:404]
ps <- reshape(ps.w,
varying = pctChg.names,
v.names = "percent_change",
timevar = "frame",
times = 0:400,
idvar = c("subject_id", "tp", "eye"),
direction = "long")
ps <- ps[order(ps$subject_id, ps$tp, ps$eye), ]
right_0.post <- ps[ps$tp == "post" & ps$eye == "Right", ]
post.ids <- unique(right_0.post$subject_id)
right_0.post.w <- ps.w[ps.w$tp == "post" & ps.w$eye == "Right", ]
right_0.pt <- reshape(ps[ps$eye == "Right", ],
timevar = "tp",
idvar = c("subject_id", "frame"),
direction = "wide")
right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2
right_0.pt <- right_0.pt[, c("subject_id", "frame", "wiPctChg")]
right_0.pt.w <- reshape(right_0.pt[, c("subject_id", "frame", "wiPctChg")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 2:402]))
rowsAllMissing <- names(allMissing)[allMissing == 401]
right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]
################################# ----------------------------- Left eye
left_0.post <- ps[ps$tp == "post" & ps$eye == "Left", ]
left.post.ids <- unique(left_0.post$subject_id)
left_0.post.w <- ps.w[ps.w$tp == "post" & ps.w$eye == "Left", ]
left_0.pt <- reshape(ps[ps$eye == "Left", ],
timevar = "tp",
idvar = c("subject_id", "frame"),
direction = "wide")
left_0.pt$wiPctChg <- left_0.pt$percent_change.post - left_0.pt$percent_change.pre2
left_0.pt <- left_0.pt[, c("subject_id", "frame", "wiPctChg")]
left_0.pt.w <- reshape(left_0.pt[, c("subject_id", "frame", "wiPctChg")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(left_0.pt.w[, 2:402]))
rowsAllMissing <- names(allMissing)[allMissing == 401]
left_0.pt.w <- left_0.pt.w[!(rownames(left_0.pt.w) %in% rowsAllMissing), ]
## Find intersection of all participant files across data sets to define the analytic subjects
analytic.N <- intersect(unique(right_0.post$subject_id), unique(right_0.pt$subject_id))
analytic.N <- intersect(analytic.N, unique(right_0.post.w$subject_id))
analytic.N <- intersect(analytic.N, unique(right_0.pt.w$subject_id))
analytic_L.N <- intersect(unique(left_0.post.w$subject_id), unique(left_0.post$subject_id))
analytic_L.N <- intersect(analytic.N, unique(left_0.pt$subject_id))
analytic_L.N <- intersect(analytic.N, unique(left_0.pt.w$subject_id))
analytic_LR.N <- intersect(analytic.N, analytic_L.N)
## Filter all datasets to analytic sample
left_0.post <- left_0.post[left_0.post$subject_id %in% analytic_LR.N, ]
left_0.post$seconds <- left_0.post$frame/30
left_0.post.w <- left_0.post.w[left_0.post.w$subject_id %in% analytic_LR.N ,]
row.names(left_0.post.w) <- left_0.post.w$subject_id
left_0.post <- merge(left_0.post, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
left_0.post.w <- merge(left_0.post.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(left_0.post.w) <- left_0.post.w$subject_id
left_0.pt <- left_0.pt[left_0.pt$subject_id %in% analytic_LR.N, ]
left_0.pt$seconds <- left_0.pt$frame/30
left_0.pt.w <- left_0.pt.w[left_0.pt.w$subject_id %in% analytic_LR.N, ]
row.names(left_0.pt.w) <- left_0.pt.w$subject_id
left_0.pt <- merge(left_0.pt, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
left_0.pt.w <- merge(left_0.pt.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(left_0.pt.w) <- left_0.pt.w$subject_id
## Filter all datasets to analytic sample
right_0.post <- right_0.post[right_0.post$subject_id %in% analytic.N, ]
right_0.post$seconds <- right_0.post$frame/30
right_0.post.w <- right_0.post.w[right_0.post.w$subject_id %in% analytic.N ,]
row.names(right_0.post.w) <- right_0.post.w$subject_id
right_0.post <- merge(right_0.post, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
right_0.post.w <- merge(right_0.post.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(right_0.post.w) <- right_0.post.w$subject_id
right_0.pt <- right_0.pt[right_0.pt$subject_id %in% analytic.N, ]
right_0.pt$seconds <- right_0.pt$frame/30
right_0.pt.w <- right_0.pt.w[right_0.pt.w$subject_id %in% analytic.N, ]
right_0.pt <- merge(right_0.pt, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
right_0.pt.w <- merge(right_0.pt.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(right_0.pt.w) <- right_0.pt.w$subject_id
pt.analytic.df <- pt.df[pt.df$subject_id %in% analytic.N,]
post.right.ids <- unique(ps$subject_id[ps$tp == "post" & ps$eye == "Right"])
pre.right.ids <- unique(ps$subject_id[ps$tp == "pre2" & ps$eye == "Right"])
post.exclude.ids <- post.right.ids[!(post.right.ids %in% pre.right.ids)]
# ### Write Analytic Files
#
# write.csv(pt.analytic.df,
# file.path(data_folder, "PupilLightResponse_AnalyticSample_Demog.csv"),
# row.names = F)
#
# write.csv(right_0.post.w,
# file.path(data_folder, "PupilLightResponse_RightEye_Post_Wide.csv"),
# row.names = F)
#
# write.csv(right_0.post,
# file.path(data_folder, "PupilLightResponse_RightEye_Post_Long.csv"),
# row.names = F)
for(i in post.exclude.ids){
print(ggplot(ps[ps$subject_id == i & ps$tp == "post" & ps$eye == "Right", ],
aes(x = frame, y = percent_change))+
geom_line())
}
#Figure Typical Pupil Light Response
# for(i in non_user.id){
# print(ggplot(right_0.post[right_0.post$subject_id == i, ],
# aes(x = frame, y = percent_change))+
# labs(x = i)+
# geom_line())
# }
typical.df <- right_0.post[right_0.post$subject_id == "001-032", ]
typical.df$seconds <- typical.df$frame/30
typical_1 <- typical.df$percent_change[typical.df$seconds == 1]
typical_2 <- typical.df$percent_change[typical.df$seconds == 2]
typical_5 <- typical.df$percent_change[typical.df$seconds == 5]
min.constrict.x <- typical.df$seconds[typical.df$percent_change == min(typical.df$percent_change, na.rm = T)][1]
min.constrict.y <- min(typical.df$percent_change, na.rm = T)
typical.df$minConstrict <- ifelse(typical.df$seconds == min.constrict.x &
typical.df$percent_change == min.constrict.y,
"Point of \nminimal \nconstriction", "")
typical.df$minConstrict[typical.df$seconds == 2] <- "Percent \nchange \nat 2 seconds"
typical.df$minConstrict[typical.df$seconds == 5] <- "Percent \nchange \nat 5 seconds"
ggplot(typical.df, aes(x = seconds, y = percent_change))+
geom_line()+
geom_ribbon(data = typical.df[typical.df$seconds >= min.constrict.x, ],
aes(ymin = 0, ymax = percent_change), fill= "lightblue")+
# geom_text(hjust = 0.001)+
labs(y = "Percent change in pupil response",
x = "Seconds from start of light test")+
scale_x_continuous(expand = c(0,0), limits = c(0, 10),
breaks = c(0,2,4,6,8,10))+
scale_y_continuous(expand = c(0,0), limits = c(-30, 1))+
# annotate("rect", xmin = min.constrict.x, xmax = 10,
# ymin = min.constrict.y-2, ymax = 1,
# alpha = 0.5, fill = "lightblue")+
geom_point(x = min.constrict.x,
y = min.constrict.y,
colour = "darkblue",
size = 2)+
geom_point(x = 5,
y = typical_5,
colour = "violet",
size = 2)+
geom_point(x = 2,
y = typical_2,
colour = "violet",
size = 2)+
geom_vline(xintercept = min.constrict.x, color = "darkblue", linetype ="dotted")+
# annotate("text", x =min.constrict.x-0.25,
# y = min.constrict.y-0.5,
# label = "Point of \nminimal \nconstriction",
# hjust= 1)+
geom_text_repel(aes(label = minConstrict),
force = 50,
max.overlaps = 50,
nudge_x = 0.1)+
annotate("text", x = 6.25, y = -1,
label = "Rebound dilation")
No Consumption end time recorded for non-users
pt.df$postConsumpTimeToTest[pt.df$user_cat == "non-user"] <- 0
tab1 <- pt.df[pt.df$subject_id %in% analytic.N, c("user_cat", "demo_age", "demo_gender", "BMI", "thc.post", "postConsumpTimeToTest")]
tab1$demo_gender <- as.character(tab1$demo_gender)
table1.df <- tbl_summary(tab1,
by = user_cat,
statistic = list(
demo_age ~ "{mean} ({sd})",
BMI ~ "{mean} ({sd})",
thc.post ~ "{median} ({p25}, {p75})",
postConsumpTimeToTest ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
label = list(
demo_age ~ "Age (years)",
demo_gender ~ "Sex",
BMI ~ "Body Mass Index (kg/m^2)",
thc.post ~ "THC, after cannabis smoking (ng/ml)",
postConsumpTimeToTest ~ "Time Interval of pupillary measurements after initiation of cannabis smoking (mins)"
),
digits = all_continuous() ~ 2,
missing = "no") %>%
modify_header(all_stat_cols() ~ "**{level}**\n(N = {n})") %>%
add_overall(last = TRUE, col_label = "**Total**\n(N = {N})") %>%
add_p() %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Cannabis Use Group**") %>%
bold_labels()
table1.df
| Characteristic | Cannabis Use Group | Overall, N = 841 | p-value2 | ||
|---|---|---|---|---|---|
| non-user (N = 29)1 | occasional (N = 30)1 | daily (N = 25)1 | |||
| Age (years) | 32.29 (4.70) | 31.15 (4.75) | 32.75 (5.71) | 32.02 (5.02) | 0.5 |
| Sex | 0.2 | ||||
| Female | 16 (55%) | 10 (33%) | 9 (36%) | 35 (42%) | |
| Male | 13 (45%) | 20 (67%) | 16 (64%) | 49 (58%) | |
| Body Mass Index (kg/m^2) | 24.94 (4.72) | 24.49 (3.96) | 27.08 (4.26) | 25.42 (4.41) | 0.066 |
| THC, after cannabis smoking (ng/ml) | 0.00 (0.00, 0.00) | 5.73 (3.73, 9.47) | 17.84 (8.20, 42.42) | 3.96 (0.00, 13.60) | <0.001 |
| Time Interval of pupillary measurements after initiation of cannabis smoking (mins) | 0.00 (0.00) | 63.93 (6.26) | 60.16 (3.78) | 40.74 (30.10) | <0.001 |
| 1 Mean (SD); n (%); Median (IQR) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
# table1.df %>% as_flex_table() %>% flextable::save_as_docx(path = "../results/tabl1.docx")
# tab1_latex <- table1.df %>% as_gt() %>% gt::as_latex()
# table1.df %>% as_gt() %>% gt::gtsave(filename = "../results/tabl1.png")
pt.df$postConsumpTimeToTest[pt.df$user_cat == "non-user"] <- NA
# boxplot(demo_age ~ user_cat, data = tab1)
# m1 <- lm(log(demo_age) ~ user_cat, data = tab1)
# summary(m1)
# hist(resid(m1))
#
# boxplot(BMI ~ user_cat, data = tab1)
#
# m2 <- lm(BMI ~ user_cat, data = tab1)
# summary(m2)
# hist(resid(m2), breaks = 15)
#
# plot(fitted(m2), resid(m2))
boxplot(pt.df$postConsumpTimeToTest)
hist(pt.df$postConsumpTimeToTest)
summary(pt.df$postConsumpTimeToTest)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 53.00 59.00 62.00 62.74 66.00 84.00 32
post.user <- ggplot(right_0.post, aes(x=seconds, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat))+
labs(title ="POST data percent change by THC use category")+
theme_bw()
post.user
Another method that can be used to predict smoking status is a scalar-on-function regression model which uses differences between the whole trajectories to determine smoking status. However, when there are missing values (due to different test durations), all trajectories should be truncated to the shortest or the values should be imputed with fPCA. (See exploration of using SoFR line starting at 3000)
sofr_impute_df <- right_0.post.w[, pctChg_vars]
rownames(sofr_impute_df) <- rownames(right_0.post.w)
sofr_fpca <- fpca.face(as.matrix(sofr_impute_df))
sofr_fpca2 <- sofr_fpca$Yhat
names(sofr_fpca2) <- names(sofr_impute_df)
sofr_imputed <- sofr_impute_df
sofr_imputed[is.na(sofr_imputed)] <- sofr_fpca2[is.na(sofr_imputed)]
ncols = ncol(sofr_imputed)
smk.df <- pt.analytic.df[, c("subject_id", "user_cat")]
smk.df$smoker <- ifelse(pt.analytic.df$user_cat %in% c("daily", "occasional"), 1, 0)
sind <- seq(0, 1, len = ncols)
smat <- matrix(sind, nrow(smk.df), ncols, byrow = T)
# smk.df$Zsm <- I(Zsm)
smk.df$Zraw <- I(as.matrix(sofr_imputed))
smk.df$smat <- I(smat)
smk.df$lmat <- I(matrix(1/ncols, nrow(smk.df), ncols))
smk.df$zlmat <- I(smk.df$lmat * smk.df$Zraw)
smk_fglm_ps2 <- gam(smoker ~ s(smat, by=zlmat, bs = "cr", k = 30), data=smk.df,
method = "REML", family = binomial)
summary(smk_fglm_ps2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## smoker ~ s(smat, by = zlmat, bs = "cr", k = 30)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3731 0.5711 2.404 0.0162 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(smat):zlmat 4.79 5.463 10.78 0.105
##
## R-sq.(adj) = 0.117 Deviance explained = 13.1%
## -REML = 52.945 Scale est. = 1 n = 84
pt.sofr.roc <- roc(smk_fglm_ps2$model$smoker, smk_fglm_ps2$fitted.values)
pt.sofr.roc.auc <- auc(pt.sofr.roc)
auc.df <- data.frame("Label" = c("Post AUC Scalar Feat", "Post SoFR AUC"),
"AUC" = c(post.auc.scalar, pt.sofr.roc.auc),
stringsAsFactors = F)
sofr_auc <- data.frame("subject_id" = smk.df$subject_id,
"smoker" = smk.df$smoker,
"post_sofr_scores" = smk_fglm_ps2$fitted.values)
auc_inference <- merge(auc_inference, sofr_auc,
by = c("subject_id", "smoker"))
df_sofr <- data.frame("smat" = seq(0, 1, length.out = ncol(smk.df$smat)),
"zlmat" = 1)
sofr_pred <- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "iterms")
# sofr_pred2 <- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "terms")
plot.sofr <- data.frame("frame" = 0:(ncol(smk.df$smat) - 1),
"f0" = sofr_pred$fit[, 1],
'f0.se' = sofr_pred$se.fit[, 1])
plot.sofr$lci <- exp(plot.sofr$f0 - 1.96*plot.sofr$f0.se)
plot.sofr$uci <- exp(plot.sofr$f0 + 1.96*plot.sofr$f0.se)
plot.sofr$OR <- exp(plot.sofr$f0)
plot.sofr$sec <- plot.sofr$frame/30
plot.sofr$p <- exp(plot.sofr$f0)/(1+exp(plot.sofr$f0))
sigRegion <- function(df, crit.val, imp.var){
time <- imp.var[1]
est <- imp.var[2]
lci <- imp.var[3]
uci <- imp.var[4]
crit.rows <- which((df[, lci] < crit.val & df[, uci] < crit.val) |
(df[, lci] > crit.val & df[, uci] > crit.val))
# print(crit.rows)
group <- vector(mode = "numeric", length = length(crit.rows))
grp.ind <- 1
for(i in 1:length(crit.rows)){
if(i == 1){
group[i] <- grp.ind}
else(if(i > 1){
if(crit.rows[i] - crit.rows[i-1] == 1){
group[i] <- grp.ind
}
else(if(crit.rows[i] - crit.rows[i-1] != 1){
grp.ind <- grp.ind+1
group[i] <- grp.ind
})
})
}
# print(group)
num.grp <- unique(group)
ind.df <- data.frame(group = NA,
start = NA,
end = NA)
for(i in num.grp){
ind.df[i,] <- c(i, min(crit.rows[group == i]), max(crit.rows[group == i]))
}
# print(ind.df)
res.df <- data.frame(int.start = NA,
int.end = NA,
est.time = NA,
est = NA,
lci = NA,
uci = NA)
# print(nrow(ind.df))
for(i in 1:nrow(ind.df)){
sub.df <- df[(ind.df$start[i]):(ind.df$end[i]), ]
# print(sub.df[1, ])
if(sub.df[1, lci] > crit.val & sub.df[1, uci] >crit.val){
start1 <- round(sub.df[1, time], 2)
end1 <- round(sub.df[nrow(sub.df), time], 2)
or1 <- max(sub.df[ , est])
peaktime1 <- round(sub.df[sub.df[, est] == or1, time], 2)
lci1 <- round(sub.df[sub.df[, est] == or1, lci], 2)
uci1 <- round(sub.df[sub.df[, est] == or1, uci], 2)
or1 <- round(or1, 2)
res.df[i, ] <- c(start1, end1, peaktime1, or1, lci1, uci1)
# print(res.df)
}
if(sub.df[1, lci] < crit.val & sub.df[1, uci] < crit.val){
start2 <- round(sub.df[1, time], 2)
end2 <- round(sub.df[nrow(sub.df), time], 2)
or2 <- min(sub.df[, est])
peaktime2 <- round(sub.df[sub.df[, est] == or2, time], 2)
lci2 <- round(sub.df[sub.df[, est] == or2, lci], 2)
uci2 <- round(sub.df[sub.df[, est] == or2, uci], 2)
or2 <- round(or2, 2)
res.df[i, ] <- c(start2, end2, peaktime2, or2, lci2, uci2)
# print(res.df)
}
}
# crit.ind <- by(crit.rows, INDICES = list(group), FUN = function(x) c(min(x), max(x)))
return(res.df)
}
sofr.sigRegions <- sigRegion(plot.sofr, 1, imp.var = c("sec", "OR", "lci", "uci"))
sofr_plot <- ggplot(data = plot.sofr, aes(x=frame/30, y = exp(f0)))+
geom_line()+
geom_line(aes(x=frame/30, y = exp(f0-1.96*f0.se)), linetype = 'longdash')+
geom_line(aes(x=frame/30, y = exp(f0+1.96*f0.se)), linetype = 'longdash')+
geom_segment(data=sofr.sigRegions, aes(x = int.start, y=1, xend= int.end, yend = 1),
color = "darkred", linewidth =1.2)+
# geom_hline(yintercept = 1, color = "darkred", linetype = 1) +
scale_x_continuous(expand = c(0, 0), limits = c(0,10),
breaks = c(0,2,4,6,8,10))+
scale_y_continuous(limits = c(0,8),
breaks = c(0,1,2,3,4,6,8)) +
labs(title = "",
x = "Seconds after start of light test",
y = "Odds ratio between smokers and non-smokers")+
theme_bw()
# sofr_plot_prob <- ggplot(data = plot.sofr, aes(x=frame/30, y = p))+
# geom_line()+
# geom_line(aes(x=frame/30, y = exp(f0-1.96*f0.se)/(1+exp(f0-1.96*f0.se))), linetype = 'longdash')+
# geom_line(aes(x=frame/30, y = exp(f0+1.96*f0.se)/(1+exp(f0+1.96*f0.se))), linetype = 'longdash')+
# geom_segment(data=sofr.sigRegions, aes(x = int.start, y=0.50, xend= int.end, yend = 0.5),
# color = "darkred", linetype = "dotted", linewidth =1.2)+
# # geom_hline(yintercept = 1, color = "darkred", linetype = 1) +
# scale_x_continuous(expand = c(0, 0), limits = c(0,10))+
# ylim(0,1) +
# labs(title = "",
# x = "Seconds after start of light test",
# y = "Probability of being a smoker")+
# theme_bw()
sofr_plot
# "Prediction of Cannabis Use with Single Summary Features and Full Trajectory Model after Cannabis Consumption"
# jpeg(file.path(res_folder, "AUC_4PC_ScalarFeatures.jpg"),
# height = 6, width = 8, res = 300, units = "in")
roc.col <- viridis(5)
# plot.roc(pt.scalar.roc, main = "",
# col = roc.col[2])
# plot.roc(pt.sofr.roc, main = "", add = T, col = roc.col[5])
# legend(0.7, 0.1, legend = c(paste0("Summary Features AUC: ",round(post.auc.scalar, 3)),
# paste0("Full Trajectory AUC: ", round(pt.sofr.roc.auc, 3))),
# col = roc.col[c(2,5)],
# lty = rep(1, 2), cex = 0.8, bty="n")
auc_inference.l <- reshape(auc_inference,
varying = c("post_SF_aucScores", "post_sofr_scores"),
v.names = "prediction",
timevar = "model",
times = c("post_SF_aucScores", "post_sofr_scores"),
direction = "long")
auc_inference.l$modelName <- ifelse(auc_inference.l$model == "post_SF_aucScores",
paste0("Traditional LogRegr AUC: ",round(post.auc.scalar, 3)),
paste0("Functional LogRegr AUC: ", round(pt.sofr.roc.auc, 3)))
plot.roc <- ggplot(data = auc_inference.l, aes(d = smoker, m = prediction, color = modelName))+
geom_roc(n.cuts=0)+
labs(x = "1-Specificity",
y = "Sensitivity",
color = "")+
scale_color_manual(values = roc.col[c(5,2)]) +
theme_bw()+
theme(legend.position = c(0.6,0.2))
plot.roc
final.roc <- grid.arrange(plot.roc, sofr_plot,
layout_matrix = cbind(1,2))
labeled.roc.plot <- as_ggplot(final.roc) +
draw_plot_label(label = c("A", "B"), size = 15,
x = c(0, 0.5), y = c(1, 1))
labeled.roc.plot
auc.df$AUC <- round(as.numeric(auc.df$AUC), 3)
knitr::kable(auc.df)
| Label | AUC |
|---|---|
| Post AUC Scalar Feat | 0.675 |
| Post SoFR AUC | 0.713 |
AUC Inference
Vr_10 <- function(nN, srpi, srnj){
Vr_10i <- vector(mode = "numeric", length = length(srpi))
for(i in 1:length(srpi)){
Vr_10i[i] = (1/nN)*(sum(srpi[i] > srnj)+ 0.5*sum(srpi[i] == srnj))
}
return(Vr_10i)
}
Vr_01 <- function(pN, srpi, srnj){
Vr_01i <- vector(mode = "numeric", length = length(srnj))
for(i in 1:length(srnj)){
Vr_01i[i] = (1/pN)*(sum(srnj[i] < srpi)+ 0.5*sum(srnj[i] == srpi))
}
return(Vr_01i)
}
w10 <- function(nP, v10_r, auc_r, v10_s, auc_s){
diff_r <- vector(mode = "numeric", length = length(v10_r))
diff_s <- vector(mode = "numeric", length = length(v10_s))
for(i in 1:length(v10_r)){
diff_r[i] <- v10_r[i] - auc_r
diff_s[i] <- v10_s[i] - auc_r
}
res <- (nP-1)^(-1)* sum(diff_r * diff_s)
return(res)
}
w01 <- function(nN, v01_r, auc_r, v01_s, auc_s){
diff_r <- vector(mode = "numeric", length = length(v01_r))
diff_s <- vector(mode = "numeric", length = length(v01_s))
for(i in 1:length(v01_r)){
diff_r[i] <- v01_r[i] - auc_r
diff_s[i] <- v01_s[i] - auc_r
}
res <- (nN-1)^(-1)* sum(diff_r * diff_s)
return(res)
}
w_rs <- function(nP, nN, srpi_r, srnj_r, srpi_s, srnj_s, auc_r, auc_s){
v10_r <- Vr_10(nN, srpi_r, srnj_r)
v01_r <- Vr_01(pN=nP, srpi_r, srnj_r)
v10_s <- Vr_10(nN, srpi_s, srnj_s)
v01_s <- Vr_01(pN=nP, srpi_s, srnj_s)
w10_rs <- w10(nP, v10_r, auc_r, v10_s, auc_s)
w01_rs <- w01(nN, v01_r, auc_r, v01_s, auc_s)
res <- (nP)^(-1)*w10_rs + (nN)^(-1)*w01_rs
return(res)
}
w_postSF_postSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])
w_postSOFR_postSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$post_sofr_scores[auc_inference$smoker == 1],
srnj_s = auc_inference$post_sofr_scores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "Post AUC SoFR"])
w_postSOFR_postSOFR <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])
z_postSF_postSOFR <- (auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"] - auc.df$AUC[auc.df$Label == "Post SoFR AUC"])/
(sqrt(w_postSF_postSF + w_postSOFR_postSOFR - w_postSOFR_postSF))
auc_compare <- data.frame("Test Desciption" = "AUC-postSF v AUC-postSoFR",
"z" = round(z_postSF_postSOFR, 3))
auc_compare$p <- round((1-pnorm(abs(auc_compare$z)))*2, 3)
knitr::kable(auc_compare)
| Test.Desciption | z | p |
|---|---|---|
| AUC-postSF v AUC-postSoFR | -0.53 | 0.596 |
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t)*I(user = occasional) + f_2(t)*I(user = daily) + b_i(t) + \epsilon_i(t)\\ & b_i(t) \sim GP(0, \Sigma_b) \\ & \epsilon_i(t) \sim N(0, \sigma_{\epsilon}^2) \\ &= \sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)*I(user = occasional) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i)*I(user = daily) + \sum_{k=1}^K \xi_{ik}\phi_k(t_i) + \epsilon_i(t)\\ \end{aligned} \]
\[ \begin{aligned} Y_{i}(t) &= f_0(t)\\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) \\ &= \Phi_0\xi_0 \end{aligned} \]
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)\\ &= \Phi_0\xi_0 + \Phi_1\xi_1\\ &= [ \Phi_0, \Phi_1 ] \xi \end{aligned} \]
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_2(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i) \\ &= \Phi_0\xi_0 + \Phi_2\xi_2\\ &= [ \Phi_0, \Phi_2 ] \xi \end{aligned} \]
right_0.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
right_0.gam.df$sind <- right_0.gam.df$frame/400
m.post_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr"),
data = right_0.gam.df, method = "REML")
## Create a matrix of residuals
post_gam.resid <- cbind(right_0.gam.df[!(is.na(right_0.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_gam$residuals)
names(post_gam.resid) <- c("subject_id", "frame", "resid")
resid_mat <- reshape(post_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
resid_mat <- as.matrix(resid_mat)
post_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
right_0.gam.df <- merge(right_0.gam.df, Phi_mat,
by = "frame",
all.x = T)
right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
right_0.gam.df$subject_id <- as.factor(right_0.gam.df$subject_id)
post_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = right_0.gam.df, discrete = TRUE)
summary(post_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2,
## bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id,
## by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3125 0.9197 -11.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.54 28.43 115.67 <2e-16 ***
## s(sind):occasional 18.63 21.95 70.14 <2e-16 ***
## s(sind):daily 18.62 21.95 35.42 <2e-16 ***
## s(subject_id):Phi1 82.23 84.00 23472.92 <2e-16 ***
## s(subject_id):Phi2 81.68 84.00 2775.02 <2e-16 ***
## s(subject_id):Phi3 82.23 84.00 887.87 <2e-16 ***
## s(subject_id):Phi4 81.05 84.00 1139.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.966 Deviance explained = 96.6%
## fREML = 63334 Scale est. = 2.6314 n = 32642
post_gam.coef <- post_gam.fri$coefficients
post_gam.cov <- vcov.gam(post_gam.fri)
df_pred_non <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_non <- predict(post_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_occ <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 1, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_occ <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")
df_pred_dly <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_dly <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df_user <- data.frame(seconds = rep(seq(0, 400), 3)/30,
user = c(rep("no use", 401), rep("occasional use", 401),rep("daily use", 401)),
mean = c(lpmat_non %*% post_gam.coef,
lpmat_occ %*% post_gam.coef, lpmat_dly %*% post_gam.coef),
se = c(sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_occ %*% post_gam.cov %*% t(lpmat_occ))),
sqrt(diag(lpmat_dly %*% post_gam.cov %*% t(lpmat_dly)))),
stringsAsFactors = F)
post_userProfile <- ggplot(data = pred_df_user, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent change in pupil response",
x = "Seconds from start of light test",
colour = "Cannabis Use")+
theme_bw()
post_userProfile
legend_userProfile <- g_legend(post_userProfile)
pred_f1 <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "terms")
pred_f2 <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
f0_hat = lpmat_non %*% post_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2],
f2_hat = pred_f2$fit[, 3],
f2_se = pred_f2$se.fit[, 3])
pred_coef_df$f1_lci <- pred_coef_df$f1_hat - 2*pred_coef_df$f1_se
pred_coef_df$f1_uci <- pred_coef_df$f1_hat + 2*pred_coef_df$f1_se
pred_coef_df$f2_lci <- pred_coef_df$f2_hat - 2*pred_coef_df$f2_se
pred_coef_df$f2_uci <- pred_coef_df$f2_hat + 2*pred_coef_df$f2_se
occ_non.sigRegion <- sigRegion(pred_coef_df, crit.val = 0, imp.var = c("seconds", "f1_hat", "f1_lci", "f1_uci"))
dly_non.sigRegion <- sigRegion(pred_coef_df, crit.val = 0, imp.var = c("seconds", "f2_hat", "f2_lci", "f2_uci"))
post_non_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f0_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f0_hat + 2*f0_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f0_hat - 2*f0_se), linetype = "longdash")+
scale_x_continuous(expand = c(0, 0), limits = c(0,10))+
scale_y_continuous(limits = c(-7, 7))+
labs(title = "Average Percent Change of a Non-user (Post)", ylab = "Percent Change")+
theme_bw()
post_occ_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
# geom_line(aes(x = seconds, y = 0), col = "darkred")+
geom_segment(data=occ_non.sigRegion, aes(x = int.start, y=0, xend= int.end, yend = 0),
color = "darkred", linewidth =1.2)+
labs(title = "Occasional use vs No use",
y= "",
x = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0), limits = c(0,10),
breaks = c(0,2,4,6, 8, 10))+
scale_y_continuous(limits = c(-7, 7), breaks = c(-6,-4,-2,0,2,4,6))+
theme(rect = element_rect(fill = "transparent"))
post_dly_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f2_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f2_hat + 2*f2_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2_hat - 2*f2_se), linetype = "longdash")+
# geom_line(aes(x = seconds, y = 0), col = "darkred")+
geom_segment(data=dly_non.sigRegion, aes(x = int.start, y=0, xend= int.end, yend = 0),
color = "darkred", linewidth =1.2)+
labs(title = "Daily use vs No use",
y = "",
x = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0), limits = c(0,10),
breaks = c(0,2,4,6, 8, 10))+
scale_y_continuous(limits = c(-7, 7), breaks = c(-6,-4,-2,0,2,4,6))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in percent change in pupil response", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_occ_coef <- grid.arrange(ylab, posval, negval, post_occ_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
post_dly_coef <- grid.arrange(ylab, posval, negval, post_dly_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
Plot difference between daily and occasional user
f2_f1 <- (lpmat_dly - lpmat_occ) %*% post_gam.coef
f2_f1.se <- sqrt(diag((lpmat_dly - lpmat_occ) %*% post_gam.cov %*% t((lpmat_dly - lpmat_occ))))
f2_f1_diff_df <- data.frame(seconds = seq(0, 400)/30,
f2f1_diff = f2_f1,
f2f1_se = f2_f1.se)
post_dlyocc_plt <- ggplot(data = f2_f1_diff_df, aes(x=seconds, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = seconds, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
# geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Daily use vs Occasional use",
y = "",
x = "Seconds from start of light test")+
theme_bw()+
scale_x_continuous(expand = c(0, 0), limits = c(0,10),
breaks = c(0,2,4,6, 8, 10))+
scale_y_continuous(limits = c(-7, 7), breaks = c(-6,-4,-2,0,2,4,6))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in percent change in pupil response", rot = 90,
gp = gpar(fontsize = 12))
posval_do <- textGrob(label = sapply(strwrap("Less Constriction in Daily User", width = 15,
simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval_do <- textGrob(label = sapply(strwrap("More Constriction in Daily User", width = 15,
simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_dlyocc_coef <- grid.arrange(ylab, posval_do, negval_do, post_dlyocc_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)),
c(1, 3, rep(4, 15))))
post_dlyocc_coef
## TableGrob (2 x 17) "arrange": 4 grobs
## z cells name grob
## 1 1 ( 1- 2, 1- 1) arrange text[GRID.text.983]
## 2 2 ( 1- 1, 2- 2) arrange text[GRID.text.984]
## 3 3 ( 2- 2, 2- 2) arrange text[GRID.text.985]
## 4 4 ( 1- 2, 3-17) arrange gtable[layout]
Panel Graphic of Differences
g1 <- grid.arrange(posval, negval, post_occ_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, rep(3, 9)),
c(2, rep(3, 9))))
g2 <- grid.arrange(posval, negval, post_dly_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, rep(3, 9)),
c(2, rep(3, 9))))
g3 <- grid.arrange(posval_do, negval_do, post_dlyocc_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, rep(3, 9)),
c(2, rep(3, 9))))
fosr_contrasts <- grid.arrange(ylab, g1, g2, g3,
ncol = 2, nrow = 18,
layout_matrix = rbind(c(1, rep(2, 15)),
c(1, rep(2, 15)),
c(1, rep(2, 15)),
c(1, rep(2, 15)),
c(1, rep(2, 15)),
c(1, rep(3, 15)),
c(1, rep(3, 15)),
c(1, rep(3, 15)),
c(1, rep(3, 15)),
c(1, rep(3, 15)),
c(1, rep(4, 15)),
c(1, rep(4, 15)),
c(1, rep(4, 15)),
c(1, rep(4, 15)),
c(1, rep(4, 15))))
fosr_contrasts
## TableGrob (15 x 16) "arrange": 4 grobs
## z cells name grob
## 1 1 ( 1-15, 1- 1) arrange text[GRID.text.983]
## 2 2 ( 1- 5, 2-16) arrange gtable[arrange]
## 3 3 ( 6-10, 2-16) arrange gtable[arrange]
## 4 4 (11-15, 2-16) arrange gtable[arrange]
smoker.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
smoker.gam.df$sind <- smoker.gam.df$frame/400
smoker.gam.df$smoker <- ifelse(smoker.gam.df$user_cat == "non-user", 0, 1)
m.post_smoker_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr"),
data = smoker.gam.df, method = "REML")
## Create a matrix of residuals
post_smoker_gam.resid <- cbind(smoker.gam.df[!(is.na(smoker.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_smoker_gam$residuals)
names(post_smoker_gam.resid) <- c("subject_id", "frame", "resid")
# post_smoker_gam.resid$frame_char <- str_pad(post_smoker_gam.resid$frame,
# width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_smoker_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_smoker_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_smoker_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_smoker_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
smoker.gam.df <- merge(smoker.gam.df, Phi_mat,
by = "frame",
all.x = T)
smoker.gam.df <- smoker.gam.df[order(smoker.gam.df$subject_id, smoker.gam.df$frame), ]
smoker.gam.df$subject_id <- as.factor(smoker.gam.df$subject_id)
post_smoker_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = smoker.gam.df, discrete = TRUE)
summary(post_smoker_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker,
## k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3286 0.9645 -10.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.31 28.29 114.24 <2e-16 ***
## s(sind):smoker 19.49 22.78 66.85 <2e-16 ***
## s(subject_id):Phi1 82.65 84.00 24462.37 <2e-16 ***
## s(subject_id):Phi2 82.26 84.00 2954.85 <2e-16 ***
## s(subject_id):Phi3 82.56 84.00 943.48 <2e-16 ***
## s(subject_id):Phi4 81.70 84.00 1213.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.6%
## fREML = 63449 Scale est. = 2.6539 n = 32642
Plot difference between smoker and non-smoker
post_smoker_gam.coef <- post_smoker_gam.fri$coefficients
post_smoker_gam.cov <- vcov.gam(post_smoker_gam.fri)
df_pred_non <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" =0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = smoker.gam.df$subject_id[1])
lpmat_non <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_smk <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = smoker.gam.df$subject_id[1])
lpmat_smk <- predict(post_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "lpmatrix")
pred_df_smk <- data.frame(seconds = rep(seq(0, 400), 2)/30,
user = c(rep("no use", 401), rep("cannabis use", 401)),
mean = c(lpmat_non %*% post_smoker_gam.coef,
lpmat_smk %*% post_smoker_gam.coef),
se = c(sqrt(diag(lpmat_non %*% post_smoker_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_smk %*% post_smoker_gam.cov %*% t(lpmat_smk)))),
stringsAsFactors = F)
post_smkProfile <- ggplot(data = pred_df_smk, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df_smk$mean - 2*pred_df_smk$se),
max(pred_df_smk$mean+2*pred_df_smk$se)))+
labs(y = "Percent Change")+
theme_bw()
post_smkProfile
legend_smkProfile <- g_legend(post_smkProfile)
Manuscript plot of mean trajectories for smokers and all user categories
user_col <- viridis(15)
post_allProfile <- ggplot(data = pred_df_user, aes(x = seconds, y = mean, group = user, col = as.factor(user)))+
geom_line()+
geom_line(data = pred_df_smk[pred_df_smk$user == "cannabis use", ],
aes(x = seconds, y = mean),
linetype = "longdash", col = user_col[12])+
scale_color_manual(values = c(user_col[c(2, 13, 11)]),
breaks = c("no use", "occasional use", "daily use"))+
scale_x_continuous(expand = c(0,0), limits = c(0, 10),
breaks = c(0,2,4,6, 8, 10))+
labs(y = "Percent change in pupil response",
x = "Seconds from start of light test",
colour = "")+
theme_bw()+
theme(legend.position = c(0.85,0.8),
legend.text = element_text(size = 12))
post_allProfile
final.fosr <- grid.arrange(post_allProfile, post_occ_coef,
post_dly_coef, post_dlyocc_coef,
layout_matrix = rbind(c(1,2),
c(3,4)))
labeled.fosr.plot <- as_ggplot(final.fosr) +
draw_plot_label(label = c("A", "B", "C", "D"), size = 15,
x = c(0, 0.5, 0, 0.5), y = c(1, 1, 0.5, 0.5))
labeled.fosr.plot